library_load <- suppressMessages(
list(
# Seurat
library(Seurat),
library(tidyverse),
# Biomart
library(biomaRt)
)
)
random_seed <- 42
set.seed(random_seed)
reticulate::use_python("~/bin/miniconda3/envs/r.4.1.0-FD20200109SPLENO/bin/python")
options(warn=-1)
# Set working directory to project root
setwd("/research/peer/fdeckert/FD20200109SPLENO")
# Source files
source("plotting_global.R")
source("bin/SeuratQC.R")
# SingleRPlot
source("bin/SingleRQC.R")
#### Filtering Parameter
nFeature_RNA_min_m <- 300
nFeature_RNA_min_p <- 300
nFeature_RNA_max_m <- 3000
nFeature_RNA_max_p <- 3000
nCount_RNA_min_m <- 1000
nCount_RNA_min_p <- 1000
nCount_RNA_max_m <- 20000
nCount_RNA_max_p <- 20000
pMt_RNA_max_m <- 5
pMt_RNA_max_p <- 7.5
# Files
so_raw_file <- "data/object/seurat_raw.rds"
so_qc_file <- "data/object/seurat.rds"
# Plotting Theme
ggplot2::theme_set(theme_global_set()) # From project global source()
so_raw <- readRDS(so_raw_file)
so_raw$nFeature_RNA_max <- ifelse(so_raw$tissue == "Myeloid", nFeature_RNA_max_m, nFeature_RNA_max_p)
so_raw$nFeature_RNA_min <- ifelse(so_raw$tissue == "Myeloid", nFeature_RNA_min_m, nFeature_RNA_min_p)
so_raw$nCount_RNA_max <- ifelse(so_raw$tissue == "Myeloid", nCount_RNA_max_m, nCount_RNA_max_p)
so_raw$nCount_RNA_min <- ifelse(so_raw$tissue == "Myeloid", nCount_RNA_min_m, nCount_RNA_min_p)
so_raw$pMt_RNA_max <- ifelse(so_raw$tissue == "Myeloid", pMt_RNA_max_m, pMt_RNA_max_p)
# QC
so_raw$qc_class <- ifelse(
so_raw$cellranger_class == "Cell" &
so_raw$nFeature_RNA <= so_raw$nFeature_RNA_max &
so_raw$nFeature_RNA >= so_raw$nFeature_RNA_min &
so_raw$nCount_RNA <= so_raw$nCount_RNA_max &
so_raw$nCount_RNA >= so_raw$nCount_RNA_min &
so_raw$pMt_RNA <= so_raw$pMt_RNA_max,
"pass", "fail"
)
so_raw$tissue <- factor(so_raw$tissue, levels=c("Myeloid", "Progenitor"))
so_raw$treatment <- factor(so_raw$treatment, levels=c("NaCl", "CpG"))
Empty droplets were determined with CellRanger V3.0.2 Lun et al., 2019 EmptyDrop heuristic. RNAse activity of granulocytes might be wrongly identified as empty cells by CellRanger.
Typical Sample A steep drop-off is indicative of good separation between the cell-associated barcodes and the barcodes associated with empty GEMs. A ideal barcode rank plot has a distincitve shape, which is referred to as a "cliff and knee".
Heterogeneous Sample Heterogeneous populations of cells in a sample result in two "cliff and knee" distributions. However, there should still be clear separation between the bacodes.
Compromised Sample Round curve and lack of steep cliff may indicate low sample quality or loss of single-cell behavior. This can be due to a wetting failure, premature cell lysis, or low cell viability.
Compromised Sample Defined cliff and knee, but the total number of barcodes detected may be lower than expected. This can be caused by a sample clog or inaccurate cell count.
options(repr.plot.width=7.5, repr.plot.height=5)
rank_plot_qc_1 <- rank_plot_qc(so_raw, color_color=color$cellranger_class, formular=tissue~treatment)
rank_plot_qc_1
ggsave(rank_plot_qc_1, filename="result/qc/plot/rank_plot_qc.png", width=15, height=10)
so_qc <- subset(so_raw, subset=cellranger_class == "Cell")
density_plot_qc_1 <- density_plot_qc(so=so_qc, title="Density plot UMI count", x=nCount_RNA, xlab="log10(UMI count)", min=nCount_RNA_min, max=nCount_RNA_max, fill_color=color$tissue)
density_plot_qc_2 <- density_plot_qc(so=so_qc, title="Density plot Feature count", x=nFeature_RNA, xlab="log10(Feature count)", min=nFeature_RNA_min, max=nFeature_RNA_max, fill_color=color$tissue)
density_plot_qc_3 <- density_plot_qc(so=so_qc, title="Density plot Mt %", x=pMt_RNA, xlab="Mt [%]", min=0, max=pMt_RNA_max, log10=FALSE, fill_color=color$tissue)
ggsave(density_plot_qc_1, filename="result/qc/plot/density_plot_qc_umi.png", width=5, height=5)
ggsave(density_plot_qc_2, filename="result/qc/plot/density_plot_qc_feature.png", width=5, height=5)
ggsave(density_plot_qc_3, filename="result/qc/plot/density_plot_qc_mt.png", width=5, height=5)
options(repr.plot.width=15, repr.plot.height=5)
density_plot_qc_1 + density_plot_qc_2 + density_plot_qc_3 + plot_layout(ncol=3) & theme(legend.position="bottom")
scattern_plot_qc_1 <- scattern_plot_qc(so=so_qc, title="Mitochondrial gene percentage", color=pMt_RNA)
scattern_plot_qc_2 <- scattern_plot_qc(so=so_qc, title="Hemoglobin gene percentage", color=pHb_RNA)
scattern_plot_qc_3 <- scattern_plot_qc(so=so_qc, title="Ribsonmal gene percentage", color=pRp_RNA)
ggsave(scattern_plot_qc_1, filename="result/qc/plot/sc_mt.png", width=5, height=5)
ggsave(scattern_plot_qc_2, filename="result/qc/plot/sc_hg.png", width=5, height=5)
ggsave(scattern_plot_qc_3, filename="result/qc/plot/sc_rb.png", width=5, height=5)
options(repr.plot.width=15, repr.plot.height=5)
scattern_plot_qc_1 + scattern_plot_qc_2 + scattern_plot_qc_3 + plot_layout(ncol=3) & theme(legend.position="bottom")
box_plot_qc_1 <- box_plot_qc(so=so_raw, y=nCount_RNA, fill=tissue, ylab="UMI [count]", ymin=0)
box_plot_qc_2 <- box_plot_qc(so=so_raw, y=nFeature_RNA, fill=tissue, ylab="Feature [count]", ymin=0)
box_plot_qc_3 <- box_plot_qc(so=so_raw, y=pMt_RNA, fill=tissue, ylab="Mt [%]", ymin=0, ymax=100)
box_plot_qc_4 <- box_plot_qc(so=so_raw, y=pHb_RNA, fill=tissue, ylab="Hb [%]", ymin=0, ymax=100)
box_plot_qc_5 <- box_plot_qc(so=so_raw, y=pRp_RNA, fill=tissue, ylab="Rbl [%]", ymin=0, ymax=100)
ggsave(box_plot_qc_1[[1]] + box_plot_qc_1[[2]] + plot_layout(guides="collect", ncol=2) & theme(legend.position="bottom"), filename="result/qc/plot/box_plot_qc_1.png", width=5, height=2.5)
ggsave(box_plot_qc_2[[1]] + box_plot_qc_2[[2]] + plot_layout(guides="collect", ncol=2) & theme(legend.position="bottom"), filename="result/qc/plot/box_plot_qc_2.png", width=5, height=2.5)
ggsave(box_plot_qc_3[[1]] + box_plot_qc_3[[2]] + plot_layout(guides="collect", ncol=2) & theme(legend.position="bottom"), filename="result/qc/plot/box_plot_qc_3.png", width=5, height=2.5)
ggsave(box_plot_qc_4[[1]] + box_plot_qc_4[[2]] + plot_layout(guides="collect", ncol=2) & theme(legend.position="bottom"), filename="result/qc/plot/box_plot_qc_4.png", width=5, height=2.5)
ggsave(box_plot_qc_5[[1]] + box_plot_qc_5[[2]] + plot_layout(guides="collect", ncol=2) & theme(legend.position="bottom"), filename="result/qc/plot/box_plot_qc_5.png", width=5, height=2.5)
options(repr.plot.width=15, repr.plot.height=10)
box_plot_qc_1[[1]] + box_plot_qc_1[[2]] + box_plot_qc_2[[1]] + box_plot_qc_2[[2]] +
box_plot_qc_3[[1]] + box_plot_qc_3[[2]] + box_plot_qc_4[[1]] + box_plot_qc_4[[2]] +
box_plot_qc_5[[1]] + box_plot_qc_5[[2]] + plot_spacer() + plot_spacer() + plot_layout(guides="collect", ncol=4) & theme(legend.position="bottom")
so_qc <- subset(so_qc, subset=nFeature_RNA >= nFeature_RNA_min & nFeature_RNA <= nFeature_RNA_max & nCount_RNA >= nCount_RNA_min & nCount_RNA <= nCount_RNA_max & pMt_RNA <= pMt_RNA_max)
SingleR identifies marker genes from the reference and uses them to compute assignment scores (based on the Spearman correlation across markers) for each cell in the test dataset against each label in the reference. The label with the highest score is the assigned to the test cell, possibly with further fine-tuning to resolve closely related labels.
first.labels: Labels before fine-tuning
labels: Labels after fine-tuning
pruning: labels after pruning
ref <- readRDS("data/haemosphere/se_haemosphere.rds")
# Seurat object to SingleCellExperiment
sce <- SingleCellExperiment(list(counts=so_qc@assays$RNA@counts))
# Predict labels
label_main <- SingleR::SingleR(test=sce, ref=ref, labels=ref$label.main, assay.type.test="counts", de.method="classic")
label_fine <- SingleR::SingleR(test=sce, ref=ref, labels=ref$label.fine, assay.type.test="counts", de.method="classic")
# Save predicted labels
saveRDS(label_main, "data/object/singler/label_main.rds")
saveRDS(label_fine, "data/object/singler/label_fine.rds")
# Add labels to Seurat object
label_main_meta <- as.data.frame(label_main) %>% dplyr::select(pruned.labels, tuning.scores.first) %>% dplyr::rename(main_labels=pruned.labels, main_delta_score=tuning.scores.first)
label_fine_meta <- as.data.frame(label_fine) %>% dplyr::select(pruned.labels, tuning.scores.first) %>% dplyr::rename(fine_labels=pruned.labels, fine_delta_score=tuning.scores.first)
so_qc <- AddMetaData(so_qc, cbind(label_main_meta, label_fine_meta))
# Set factor level for labels
so_qc$main_labels <- factor(so_qc$main_labels, levels=names(color$main_labels_haemosphere))
so_qc$fine_labels <- factor(so_qc$fine_labels, levels=names(color$fine_labels_haemosphere))
so_qc$ery_score <- label_main$scores[, "Ery"]
so_qc$main_labels <- factor(so_qc$main_labels, levels=names(color$main_labels_haemosphere))
boxplot_ery_score <- ggplot(so_qc@meta.data, aes(x=ery_score, color=main_labels)) +
geom_boxplot() +
scale_color_manual(values=color$main_labels_haemosphere) +
guides(colour=guide_legend(reverse=TRUE)) +
facet_grid(~tissue, scales="free_y")
count_module <- function(so, pattern, assay="RNA", slot="counts", stat="sum") {
genes <- rownames(so)[grep(pattern, rownames(so))]
cnt <- GetAssayData(so, assay=assay, slot=slot)
cnt <- as.matrix(cnt)
cnt <- cnt[rownames(cnt) %in% genes, ]
if(stat=="sum"){cnt_stat <- colSums(cnt)}
return(cnt_stat)
}
so_qc$cHb_RNA <- count_module(so_qc, pattern="Hba-|Hbb-|Hbq1b|Hbq1a")
boxplot_ery_count_module <- ggplot(so_qc@meta.data, aes(x=log10(cHb_RNA+1), color=main_labels)) +
geom_boxplot() +
scale_color_manual(values=color$main_labels_haemosphere) +
guides(colour=guide_legend(reverse=TRUE)) +
facet_grid(~tissue, scales="free_y")
boxplot_ery_count_feature <- ggplot(so_qc@meta.data, aes(x=nFeature_RNA, y=log10(cHb_RNA), color=main_labels)) +
geom_point() +
scale_color_manual(values=color$main_labels_haemosphere) +
guides(colour=guide_legend(reverse=TRUE)) +
facet_grid(~tissue, scales="free_y")
boxplot_ery_count_umi <- ggplot(so_qc@meta.data, aes(x=nCount_RNA, y=log10(cHb_RNA), color=main_labels)) +
geom_point() +
scale_color_manual(values=color$main_labels_haemosphere) +
guides(colour=guide_legend(reverse=TRUE)) +
facet_grid(~tissue, scales="free_y")
options(repr.plot.width=10, repr.plot.height=10, warn=-1)
boxplot_ery_score + boxplot_ery_count_module + boxplot_ery_count_feature + boxplot_ery_count_umi + plot_layout(guides="collect", ncol=2) & theme(legend.position="bottom")
# SingleRPlot
source("bin/SingleRQC.R")
main_labels_split <- split(label_main, f=so_qc$sample_group)
main_labels_score_hm <- list()
for(i in names(main_labels_split)) {main_labels_score_hm[[i]] <- SingleRScoreHeatMap(main_labels_split[[i]], color=color$main_labels_haemosphere, main=i)}
options(repr.plot.width=20, repr.plot.height=20)
main_labels_score_hm_grid <- gridExtra::arrangeGrob(grobs=main_labels_score_hm, ncol=1)
ggsave(main_labels_score_hm_grid, filename="result/qc/plot/main_labels_score_hm_grid.png", width=20, height=20)
grid::grid.draw(main_labels_score_hm_grid)
H2-K1 - Class I histocompatibility antigen, kappa-B alpha chain
H2-D1 - Class I histocompatibility antigen, D-B alpha chain
H2-L1 - not found
H2-Aa - Class II histocompatibility antigen, A-B alpha chain
H2-Ab1 - Class II histocompatibility antigen, A beta chain
H2-Eb1 - Class II histocompatibility antigen, I-E beta chain
H2-Eb2 - Class II histocompatibility antigen, E beta chain
Info: The antigens presented by class I peptides are derived from cytosolic proteins while calss II derived from extracellular proteins.
mhcI_genes <- c("H2-K1", "H2-D1")
mhcII_genes <- c("H2-Aa", "H2-Ab1", "H2-Eb1", "H2-Eb2")
so_qc <- AddModuleScore(so_qc, features=list(mhcI_genes), assays="RNA", slot="data", ctrl=100, nbin=25, name="msMHCI_RNA")
so_qc <- AddModuleScore(so_qc, features=list(mhcII_genes), assays="RNA", slot="data", ctrl=100, nbin=25, name="msMHCII_RNA")
Cd4 - T-cell surface glycoprotein CD4, Co-receptor for MHC class II, active on T-helper cells and triggers differentiation of monocytes into functional mature macrophages.
Cd8 - T-cell surface glycoprotein CD8, Co-receptor for MHC class I, CTL activattion and thymic selection of Cd8+ T cells.
cd4_genes <- c("Cd4")
cd8_genes <- c("Cd8a", "Cd8b1")
so_qc <- AddModuleScore(so_qc, features=list(cd4_genes), assays="RNA", slot="data", ctrl=100, nbin=25, name="msCd4_RNA")
so_qc <- AddModuleScore(so_qc, features=list(cd8_genes), assays="RNA", slot="data", ctrl=100, nbin=25, name="msCd8_RNA")
Trac - T cell receptor alpha constant
Trbc1 - T cell receptor beta constant 1
Trbc2 - T cell receptor beta constant 2
Trdc - T cell receptor delta constant
Trgc1 - T cell receptor gamma constant 1
Trgc2 - T cell receptor gamma constant 2
Cd247 - T-cell surface glycoprotein Cd3 zeta chain (Cd3z)
Cd3g - T-cell surface glycoprotein Cd3 gamma chain
Cd3e - T-cell surface glycoprotein Cd3 epsilon chain
Cd3d - T-cell surface glycoprotein Cd3 delta chain
tcr_genes <- c("Trac", "Trbc1", "Trbc2", "Trdc", "Trgc1", "Trgc2")
tcr_cd3_genes <- c("Cd247", "Cd3g", "Cd3e", "Cd3d")
so_qc <- AddModuleScore(so_qc, features=list(tcr_genes), assays="RNA", slot="data", ctrl=100, nbin=25, name="msTcr_RNA")
so_qc <- AddModuleScore(so_qc, features=list(tcr_cd3_genes), assays="RNA", slot="data", ctrl=100, nbin=25, name="msTcr_cd3_RNA")
Naive B-cells produce the following Ig classes:
Ighm - Immunoglobulin heavy constant mu (naive B-cells)
Ighd - Immunoglobulin heavy constant delta (naive B-cells)
Through isotope switching the following Ig classes can be produced:
Ighg1 - Immunoglobulin heavy constant gamma (Mouse with Igh1b have Igg2c isotope instead of Igg2a)
Ighg2a - Immunoglobulin heavy constant gamma (NA)
Ighg2b - Immunoglobulin heavy constant gamma
Ighg2c - Immunoglobulin heavy constant gamma
Ighg3 - Immunoglobulin heavy constant gamma
Igha - Immunoglobulin heavy constant alpha
Ighe - Immunoglobulin heavy constant epsilon (NA)
Igkc - Immunoglobulin kappa constant (light chain)
Iglc - Immunoglobulin lambda constant (light chain)
Ighm_genes <- c("Ighm")
Ighd_genes <- c("Ighd")
Ighg_genes <- c("Ighg1", "Ighg2a", "Ighg2b", "Ighg2c", "Ighg3")
Igha_genes <- c("Igha")
Ighe_genes <- c("Ighe")
Igkc_genes <- c("Igkc")
Iglc_genes <- c("Iglc1", "Iglc2", "Iglc3")
so_qc <- AddModuleScore(so_qc, features=list(Ighm_genes), assays="RNA", slot="data", ctrl=100, nbin=25, name="msIghm_RNA")
so_qc <- AddModuleScore(so_qc, features=list(Ighd_genes), assays="RNA", slot="data", ctrl=100, nbin=25, name="msIghd_RNA")
so_qc <- AddModuleScore(so_qc, features=list(Ighg_genes), assays="RNA", slot="data", ctrl=100, nbin=25, name="msIghg_RNA")
so_qc <- AddModuleScore(so_qc, features=list(Igha_genes), assays="RNA", slot="data", ctrl=100, nbin=25, name="msIgha_RNA")
#so_qc <- AddModuleScore(so_qc, features=list(Ighe_genes), assays="RNA", slot="data", ctrl=100, nbin=25, name="msIghe_RNA")
so_qc <- AddModuleScore(so_qc, features=list(Igkc_genes), assays="RNA", slot="data", ctrl=100, nbin=25, name="msIgkc_RNA")
so_qc <- AddModuleScore(so_qc, features=list(Iglc_genes), assays="RNA", slot="data", ctrl=100, nbin=25, name="msIglc_RNA")
cc_genes_seurat_s <- cc.genes.updated.2019$s.genes
cc_genes_seurat_g2m <- cc.genes.updated.2019$g2m.genes
# Set ssl for biomart
httr::set_config(httr::config(ssl_verifypeer=FALSE))
# ensembl_hgnc <- useMart("ensembl", dataset="hsapiens_gene_ensembl")
# ensembl_mm <- useMart("ensembl", dataset="mmusculus_gene_ensembl")
# saveRDS(ensembl_hgnc, "data/mm10/biomart/ensembl_hgnc.rds")
# saveRDS(ensembl_mm, "data/mm10/biomart/ensembl_mm.rds")
ensembl_hgnc <- readRDS("data/mm10/biomart/ensembl_hgnc.rds")
ensembl_mm <- readRDS("data/mm10/biomart/ensembl_mm.rds")
cc_genes_seurat_s <- getLDS(attributes=c("hgnc_symbol"), filters="hgnc_symbol", values=cc_genes_seurat_s, mart=ensembl_hgnc, attributesL=c("mgi_symbol"), martL=ensembl_mm, uniqueRows=T)
cc_genes_seurat_s <- cc_genes_seurat_s[, 2]
cc_genes_seurat_g2m <- getLDS(attributes=c("hgnc_symbol"), filters="hgnc_symbol", values=cc_genes_seurat_g2m, mart=ensembl_hgnc, attributesL=c("mgi_symbol"), martL=ensembl_mm, uniqueRows=T)
cc_genes_seurat_g2m <- cc_genes_seurat_g2m[, 2]
so_qc <- CellCycleScoring(so_qc, s.features=cc_genes_seurat_s, g2m.features=cc_genes_seurat_g2m, set.ident=FALSE)
colnames(so_qc@meta.data) <- gsub("Phase", "cc_phase_class", colnames(so_qc@meta.data))
colnames(so_qc@meta.data) <- gsub("S.Score", "msS_RNA", colnames(so_qc@meta.data))
colnames(so_qc@meta.data) <- gsub("G2M.Score", "msG2M_RNA", colnames(so_qc@meta.data))
so_qc$msCC_diff_RNA <- so_qc$msS_RNA - so_qc$msG2M_RNA
so_qc$cc_phase_class <- factor(so_qc$cc_phase_class, levels=names(color$cc_phase_class))
options(warn=-1)
so_qc_sample_group <- SplitObject(so_qc, split.by="sample_group")
so_qc_sample_group <- lapply(so_qc_sample_group, function(so) {
so <- SCTransform(so, assay="RNA", vars.to.regress=c("msCC_diff_RNA"), seed.use=random_seed, verbose=FALSE)
so <- RunPCA(so, npcs=100, verbose=FALSE)
so <- FindNeighbors(so, dims=1:100, k.param=20, verbose=FALSE)
so <- FindClusters(so, algorithm=4, random.seed=random_seed, verbose=FALSE)
so <- RunUMAP(so, dims=1:100, umap.method="umap-learn", seed.use=random_seed, verbose=FALSE)
return(so)
}
)
dplot_sample_group <- lapply(so_qc_sample_group, dplot_1, cluster="SCT_snn_res.0.8")
ggsave(dplot_sample_group[["Myeloid_NaCl"]], filename="result/qc/plot/dplot_sample_group_m_nacl.png", width=15, height=10)
ggsave(dplot_sample_group[["Progenitor_NaCl"]], filename="result/qc/plot/dplot_sample_group_p_nacl.png", width=15, height=10)
ggsave(dplot_sample_group[["Myeloid_CpG"]], filename="result/qc/plot/dplot_sample_group_m_cpg.png", width=15, height=10)
ggsave(dplot_sample_group[["Progenitor_CpG"]], filename="result/qc/plot/dplot_sample_group_p_cpg.png", width=15, height=10)
fplot_sample_group <- lapply(so_qc_sample_group, fplot_1)
ggsave(fplot_sample_group[["Myeloid_NaCl"]], filename="result/qc/plot/fplot_sample_group_m_nacl.png", width=15, height=10)
ggsave(fplot_sample_group[["Progenitor_NaCl"]], filename="result/qc/plot/fplot_sample_group_p_nacl.png", width=15, height=10)
ggsave(fplot_sample_group[["Myeloid_CpG"]], filename="result/qc/plot/fplot_sample_group_m_cpg.png", width=15, height=10)
ggsave(fplot_sample_group[["Progenitor_CpG"]], filename="result/qc/plot/fplot_sample_group_p_cpg.png", width=15, height=10)
options(repr.plot.width=15, repr.plot.height=10)
dplot_sample_group[["Myeloid_NaCl"]]
fplot_sample_group[["Myeloid_NaCl"]]
options(repr.plot.width=15, repr.plot.height=10)
dplot_sample_group[["Progenitor_NaCl"]]
fplot_sample_group[["Progenitor_NaCl"]]
options(repr.plot.width=15, repr.plot.height=10)
dplot_sample_group[["Myeloid_CpG"]]
fplot_sample_group[["Myeloid_CpG"]]
options(repr.plot.width=15, repr.plot.height=10)
dplot_sample_group[["Progenitor_CpG"]]
fplot_sample_group[["Progenitor_CpG"]]
options(repr.plot.width=20, repr.plot.height=10)
so_qc_sample_group_pc_annotation <- lapply(so_qc_sample_group, pc_annotation, dims=100, ident="sample_group")
so_qc_sample_group_pc_annotation_plot <- lapply(so_qc_sample_group_pc_annotation, pc_annotation_plot)
so_qc_sample_group_pc_annotation_plot <-
so_qc_sample_group_pc_annotation_plot[["Myeloid_NaCl"]] +
so_qc_sample_group_pc_annotation_plot[["Progenitor_NaCl"]] +
so_qc_sample_group_pc_annotation_plot[["Myeloid_CpG"]] +
so_qc_sample_group_pc_annotation_plot[["Progenitor_CpG"]] +
plot_layout(ncol=2, guides="collect")
so_qc_sample_group_pc_annotation_plot
ggsave(so_qc_sample_group_pc_annotation_plot, filename="result/qc/plot/so_qc_sample_group_pc_loadings_plot.png", width=10, height=5)
options(repr.plot.width=10, repr.plot.height=10)
cluster_sample_rep <- function(so) {
box_plot <- ggplot(so@meta.data, aes(x=SCT_snn_res.0.8, fill=tissue, alpha=sample_rep)) +
geom_bar(stat="count", position="fill") +
scale_fill_manual(values=color$tissue) +
scale_alpha_manual(values=c(1, 0.7)) +
ggtitle(paste(so$tissue[1], so$treatment[1])) + xlab("Cluster") + ylab("Cell frequency") +
facet_grid(~tissue)
return(box_plot)
}
cluster_sample_rep_plot <- lapply(so_qc_sample_group, cluster_sample_rep)
cluster_sample_rep_plot <-
cluster_sample_rep_plot[[1]] +
cluster_sample_rep_plot[[2]] +
cluster_sample_rep_plot[[3]] +
cluster_sample_rep_plot[[4]] +
plot_layout(ncol=2, guides="collect") & theme(legend.position="bottom")
cluster_sample_rep_plot
options(warn=-1)
so_qc_treatment <- SplitObject(so_qc, split.by="treatment")
so_qc_treatment <- lapply(so_qc_treatment, function(so) {
so <- SCTransform(so, assay="RNA", vars.to.regress=c("msCC_diff_RNA"), seed.use=random_seed, verbose=FALSE)
so <- RunPCA(so, npcs=100, seed.use=random_seed, verbose=FALSE)
so <- FindNeighbors(so, dims=1:100, k.param=20, verbose=FALSE)
so <- FindClusters(so, algorithm=4, random.seed=random_seed, verbose=FALSE)
so <- RunUMAP(so, dims=1:100, umap.method="umap-learn", seed.use=random_seed, verbose=FALSE)
return(so)
}
)
dplot_treatment <- lapply(so_qc_treatment, dplot_1, cluster="SCT_snn_res.0.8")
ggsave(dplot_treatment[["NaCl"]], filename="result/qc/plot/dplot_treatment_cpg.png", width=15, height=10)
ggsave(dplot_treatment[["CpG"]], filename="result/qc/plot/dplot_treatment_nacl.png", width=15, height=10)
fplot_treatment <- lapply(so_qc_treatment, fplot_1)
ggsave(fplot_treatment[["NaCl"]], filename="result/qc/plot/fplot_treatment_cpg.png", width=15, height=10)
ggsave(fplot_treatment[["CpG"]], filename="result/qc/plot/fplot_treatment_nacl.png", width=15, height=10)
options(repr.plot.width=15, repr.plot.height=10)
dplot_treatment[["NaCl"]]
fplot_treatment[["NaCl"]]
options(repr.plot.width=15, repr.plot.height=10)
dplot_treatment[["CpG"]]
fplot_treatment[["CpG"]]
options(repr.plot.width=20, repr.plot.height=5)
so_qc_treatment_pc_annotation <- lapply(so_qc_treatment, pc_annotation, dims=100, ident="treatment")
so_qc_treatment_pc_annotation_plot <- lapply(so_qc_treatment_pc_annotation, pc_annotation_plot)
so_qc_treatment_pc_annotation_plot <-
so_qc_treatment_pc_annotation_plot[["NaCl"]] +
so_qc_treatment_pc_annotation_plot[["CpG"]] +
plot_layout(ncol=2, guides="collect")
so_qc_treatment_pc_annotation_plot
ggsave(so_qc_treatment_pc_annotation_plot, filename="result/qc/plot/so_qc_treatment_pca_loadings_plot.png", width=10, height=5)
options(repr.plot.width=20, repr.plot.height=5, warn=-1)
cluster_sample_rep <- function(so) {
box_plot <- ggplot(so@meta.data, aes(x=SCT_snn_res.0.8, fill=tissue, alpha=sample_rep)) +
geom_bar(stat="count", position="fill") +
scale_fill_manual(values=color$tissue) +
scale_alpha_manual(values=c(1, 0.7)) +
ggtitle(so$treatment[1]) + xlab("Cluster") + ylab("Cell frequency") +
facet_grid(~tissue)
return(box_plot)
}
cluster_sample_rep_plot <- lapply(so_qc_treatment, cluster_sample_rep)
cluster_sample_rep_plot <-
cluster_sample_rep_plot[[1]] +
cluster_sample_rep_plot[[2]] +
plot_layout(ncol=2, guides="collect") & theme(legend.position="bottom")
cluster_sample_rep_plot
options(repr.plot.width=10, repr.plot.height=5)
cluster_tissue_1 <- ggplot(so_qc_treatment[[1]]@meta.data, aes(x=SCT_snn_res.0.8, fill=tissue)) +
geom_bar(stat="count", position="fill") +
scale_fill_manual(values=color$tissue) +
ggtitle("CpG sample") + xlab("Cluster") + ylab("Cell frequency") +
theme(legend.position="bottom")
cluster_tissue_2 <- ggplot(so_qc_treatment[[2]]@meta.data, aes(x=SCT_snn_res.0.8, fill=tissue)) +
geom_bar(stat="count", position="fill") +
scale_fill_manual(values=color$tissue) +
ggtitle("NaCl sample") + xlab("Cluster") + ylab("Cell frequency") +
theme(legend.position="bottom")
cluster_tissue <- cluster_tissue_1 + cluster_tissue_2
cluster_tissue
ggsave(cluster_tissue, filename="result/qc/plot/cluster_tissue_treatment.png", width=10, height=2.5)
saveRDS(so_qc, so_qc_file)
saveRDS(so_qc_treatment[["NaCl"]], "data/object/seurat_sct_nacl.rds")
saveRDS(so_qc_treatment[["CpG"]], "data/object/seurat_sct_cpg.rds")
source("bin/SeuratFacility.R")
seurat2dir(so=so_qc, dir="data/object/seurat/", overwrite=TRUE)
seurat2dir(so=so_qc_treatment[["NaCl"]], dir="data/object/seurat_sct_nacl/", overwrite=TRUE)
seurat2dir(so=so_qc_treatment[["CpG"]], dir="data/object/seurat_sct_cpg/", overwrite=TRUE)
Creating output directory data/object/seurat/ Writing meta data to data/object/seurat/meta/meta.csv Writing assay data for the assays: RNA ... RNA counts ... RNA data ... RNA scale.data
Creating output directory data/object/seurat_sct_nacl/ Writing meta data to data/object/seurat_sct_nacl/meta/meta.csv Writing assay data for the assays: RNA, SCT ... RNA counts ... RNA data ... RNA scale.data ... SCT counts ... SCT data ... SCT scale.data Writing reductions: pca, umap ... pca ... umap
NULL
Creating output directory data/object/seurat_sct_cpg/ Writing meta data to data/object/seurat_sct_cpg/meta/meta.csv Writing assay data for the assays: RNA, SCT ... RNA counts ... RNA data ... RNA scale.data ... SCT counts ... SCT data ... SCT scale.data Writing reductions: pca, umap ... pca ... umap
NULL
sessionInfo()
R version 4.1.0 (2021-05-18) Platform: x86_64-conda-linux-gnu (64-bit) Running under: CentOS Linux 7 (Core) Matrix products: default BLAS/LAPACK: /home/fdeckert/bin/miniconda3/envs/r.4.1.0-FD20200109SPLENO/lib/libopenblasp-r0.3.15.so locale: [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8 [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8 [7] LC_PAPER=en_US.UTF-8 LC_NAME=C [9] LC_ADDRESS=C LC_TELEPHONE=C [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C attached base packages: [1] parallel stats4 grid stats graphics grDevices utils [8] datasets methods base other attached packages: [1] Matrix_1.3-4 msigdbr_7.4.1 [3] viridis_0.6.2 viridisLite_0.4.0 [5] SingleR_1.6.1 SingleCellExperiment_1.14.1 [7] SummarizedExperiment_1.22.0 Biobase_2.52.0 [9] GenomicRanges_1.44.0 GenomeInfoDb_1.28.0 [11] IRanges_2.26.0 S4Vectors_0.30.0 [13] BiocGenerics_0.38.0 MatrixGenerics_1.4.0 [15] matrixStats_0.61.0 ComplexHeatmap_2.8.0 [17] patchwork_1.1.1 RColorBrewer_1.1-2 [19] biomaRt_2.48.2 forcats_0.5.1 [21] stringr_1.4.0 dplyr_1.0.7 [23] purrr_0.3.4 readr_2.0.2 [25] tidyr_1.1.4 tibble_3.1.6 [27] ggplot2_3.3.5 tidyverse_1.3.1 [29] SeuratObject_4.0.2 Seurat_4.0.5 loaded via a namespace (and not attached): [1] utf8_1.2.2 reticulate_1.22 [3] tidyselect_1.1.1 RSQLite_2.2.5 [5] AnnotationDbi_1.54.0 htmlwidgets_1.5.4 [7] BiocParallel_1.26.0 Rtsne_0.15 [9] ScaledMatrix_1.0.0 munsell_0.5.0 [11] codetools_0.2-18 ica_1.0-2 [13] pbdZMQ_0.3-5 future_1.23.0 [15] miniUI_0.1.1.1 withr_2.4.2 [17] colorspace_2.0-2 filelock_1.0.2 [19] uuid_0.1-4 rstudioapi_0.13 [21] ROCR_1.0-11 tensor_1.5 [23] listenv_0.8.0 labeling_0.4.2 [25] repr_1.1.3 GenomeInfoDbData_1.2.6 [27] polyclip_1.10-0 farver_2.1.0 [29] bit64_4.0.5 rprojroot_2.0.2 [31] parallelly_1.28.1 vctrs_0.3.8 [33] generics_0.1.1 BiocFileCache_2.0.0 [35] R6_2.5.1 doParallel_1.0.16 [37] clue_0.3-59 rsvd_1.0.5 [39] DelayedArray_0.18.0 bitops_1.0-7 [41] spatstat.utils_2.2-0 cachem_1.0.6 [43] assertthat_0.2.1 promises_1.2.0.1 [45] scales_1.1.1 gtable_0.3.0 [47] beachmat_2.8.0 Cairo_1.5-12.2 [49] globals_0.14.0 goftest_1.2-3 [51] rlang_0.4.12 GlobalOptions_0.1.2 [53] splines_4.1.0 lazyeval_0.2.2 [55] spatstat.geom_2.3-0 broom_0.7.10 [57] reshape2_1.4.4 abind_1.4-5 [59] modelr_0.1.8 backports_1.3.0 [61] httpuv_1.6.3 tools_4.1.0 [63] ellipsis_0.3.2 spatstat.core_2.3-1 [65] ggridges_0.5.3 Rcpp_1.0.7 [67] plyr_1.8.6 sparseMatrixStats_1.4.0 [69] base64enc_0.1-3 progress_1.2.2 [71] zlibbioc_1.38.0 RCurl_1.98-1.3 [73] prettyunits_1.1.1 rpart_4.1-15 [75] deldir_1.0-6 pbapply_1.5-0 [77] GetoptLong_1.0.5 cowplot_1.1.1 [79] zoo_1.8-9 haven_2.4.3 [81] ggrepel_0.9.1 cluster_2.1.2 [83] here_1.0.1 fs_1.5.0 [85] magrittr_2.0.1 magick_2.7.2 [87] data.table_1.14.2 scattermore_0.7 [89] circlize_0.4.13 lmtest_0.9-39 [91] reprex_2.0.1 RANN_2.6.1 [93] fitdistrplus_1.1-6 hms_1.1.1 [95] mime_0.12 evaluate_0.14 [97] xtable_1.8-4 XML_3.99-0.8 [99] readxl_1.3.1 gridExtra_2.3 [101] shape_1.4.6 compiler_4.1.0 [103] KernSmooth_2.23-20 crayon_1.4.2 [105] htmltools_0.5.2 mgcv_1.8-36 [107] later_1.3.0 tzdb_0.2.0 [109] lubridate_1.8.0 DBI_1.1.1 [111] dbplyr_2.1.1 MASS_7.3-54 [113] rappdirs_0.3.3 babelgene_21.4 [115] cli_3.1.0 igraph_1.2.9 [117] pkgconfig_2.0.3 IRdisplay_1.0 [119] plotly_4.10.0 spatstat.sparse_2.0-0 [121] xml2_1.3.2 foreach_1.5.1 [123] XVector_0.32.0 rvest_1.0.2 [125] digest_0.6.28 sctransform_0.3.2 [127] RcppAnnoy_0.0.19 spatstat.data_2.1-0 [129] Biostrings_2.60.0 cellranger_1.1.0 [131] leiden_0.3.9 uwot_0.1.10 [133] DelayedMatrixStats_1.14.0 curl_4.3.2 [135] shiny_1.7.1 rjson_0.2.20 [137] lifecycle_1.0.1 nlme_3.1-152 [139] jsonlite_1.7.2 BiocNeighbors_1.10.0 [141] fansi_0.5.0 pillar_1.6.4 [143] lattice_0.20-44 KEGGREST_1.32.0 [145] fastmap_1.1.0 httr_1.4.2 [147] survival_3.2-11 glue_1.5.0 [149] png_0.1-7 iterators_1.0.13 [151] bit_4.0.4 stringi_1.7.5 [153] blob_1.2.1 BiocSingular_1.8.0 [155] memoise_2.0.0 IRkernel_1.2 [157] irlba_2.3.3 future.apply_1.8.1